xend: passthrough: check if a device is behind PCIe switch that lacks ACS
authorKeir Fraser <keir.fraser@citrix.com>
Wed, 19 Aug 2009 12:12:16 +0000 (13:12 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Wed, 19 Aug 2009 12:12:16 +0000 (13:12 +0100)
Imagine a PCIe switch, which doesn't support ACS (Access Control
Services), has 2 downstream ports: A and B, according to PCIe spec,
the PCIe switch should directly route the transaction that is from A
and to a device under B -- the Root Complex and IOMMU engine are
bypassed -- this doesn't work at all in the case of hvm guest and can
even incur potential security issue, so we should not allow such kind
of device assignment.

If all the intermediate PCIe swiches between a device and Root Complex
support and enable ACS, we can safely asssign the device to guest.

Cc: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
tools/python/xen/util/pci.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/server/pciif.py

index 2ca4b3f25b22e85fc55cdcb41e7d040df38abb44..62bede17d494cb7b0ac5b4a662887ba8630eab34 100644 (file)
@@ -70,12 +70,18 @@ PCI_BRIDGE_CTL_BUS_RESET= 0x40
 PCI_CAP_ID_EXP = 0x10
 PCI_EXP_FLAGS  = 0x2
 PCI_EXP_FLAGS_TYPE = 0x00f0
+PCI_EXP_TYPE_DOWNSTREAM = 0x6
 PCI_EXP_TYPE_PCI_BRIDGE = 0x7
 PCI_EXP_DEVCAP = 0x4
 PCI_EXP_DEVCAP_FLR = (0x1 << 28)
 PCI_EXP_DEVCTL = 0x8
 PCI_EXP_DEVCTL_FLR = (0x1 << 15)
 
+PCI_EXT_CAP_ID_ACS = 0x000d
+PCI_EXT_CAP_ACS_ENABLED = 0x1d  # The bits V, R, C, U.
+PCI_EXT_ACS_CTRL = 0x06
+
+
 PCI_CAP_ID_PM = 0x01
 PCI_PM_CTRL = 4
 PCI_PM_CTRL_NO_SOFT_RESET = 0x0008
@@ -656,10 +662,15 @@ class PciDevice:
         self.subvendorname = ""
         self.subdevicename = ""
         self.dev_type = None
+        self.is_downstream_port = False
+        self.acs_enabled = False
         self.has_non_page_aligned_bar = False
         self.pcie_flr = False
         self.pci_af_flr = False
         self.detect_dev_info()
+        if (self.dev_type == DEV_TYPE_PCI_BRIDGE) or \
+            (self.dev_type == DEV_TYPE_PCIe_BRIDGE):
+            return
         self.get_info_from_sysfs()
         self.get_info_from_lspci()
 
@@ -877,6 +888,51 @@ class PciDevice:
                 (strerr, errno)))
         return pos
 
+    def find_ext_cap(self, cap):
+        path = find_sysfs_mnt()+SYSFS_PCI_DEVS_PATH+'/'+ \
+               self.name+SYSFS_PCI_DEV_CONFIG_PATH
+
+        ttl = 480; # 3840 bytes, minimum 8 bytes per capability
+        pos = 0x100
+
+        try:
+            fd = os.open(path, os.O_RDONLY)
+            os.lseek(fd, pos, 0)
+            h = os.read(fd, 4)
+            if len(h) == 0: # MMCONF is not enabled?
+                return 0
+            header = struct.unpack('I', h)[0]
+            if header == 0 or header == -1:
+                return 0
+
+            while ttl > 0:
+                if (header & 0x0000ffff) == cap:
+                    return pos
+                pos = (header >> 20) & 0xffc
+                if pos < 0x100:
+                    break
+                os.lseek(fd, pos, 0)
+                header = struct.unpack('I', os.read(fd, 4))[0]
+                ttl = ttl - 1
+            os.close(fd)
+        except OSError, (errno, strerr):
+            raise PciDeviceParseError(('Error when accessing sysfs: %s (%d)' %
+                (strerr, errno)))
+        return 0
+
+    def is_behind_switch_lacking_acs(self):
+        # If there is intermediate PCIe switch, which doesn't support ACS or
+        # doesn't enable ACS, between Root Complex and the function, we return
+        # True,  meaning the function is not allowed to be assigned to guest due
+        # to potential security issue.
+        parent = self.find_parent()
+        while parent is not None:
+            dev_parent = PciDevice(parent)
+            if dev_parent.is_downstream_port and not dev_parent.acs_enabled:
+                return True
+            parent = dev_parent.find_parent()
+        return False
+
     def pci_conf_read8(self, pos):
         fd = os.open(self.cfg_space_path, os.O_RDONLY)
         os.lseek(fd, pos, 0)
@@ -936,11 +992,19 @@ class PciDevice:
                 self.dev_type = DEV_TYPE_PCI_BRIDGE
             else:
                 creg = self.pci_conf_read16(pos + PCI_EXP_FLAGS)
-                if ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == \
-                    PCI_EXP_TYPE_PCI_BRIDGE:
+                type = (creg & PCI_EXP_FLAGS_TYPE) >> 4
+                if type == PCI_EXP_TYPE_PCI_BRIDGE:
                     self.dev_type = DEV_TYPE_PCI_BRIDGE
                 else:
                     self.dev_type = DEV_TYPE_PCIe_BRIDGE
+                    if type == PCI_EXP_TYPE_DOWNSTREAM:
+                        self.is_downstream_port = True
+                        pos = self.find_ext_cap(PCI_EXT_CAP_ID_ACS)
+                        if pos != 0:
+                            ctrl = self.pci_conf_read16(pos + PCI_EXT_ACS_CTRL)
+                            if (ctrl & PCI_EXT_CAP_ACS_ENABLED) == \
+                                PCI_EXT_CAP_ACS_ENABLED
+                                self.acs_enabled = True
         else:
             if  pos != 0:
                 self.dev_type = DEV_TYPE_PCIe_ENDPOINT
index cdf650843f17275b9e20b09ff693b3db9fe169a5..f15bcf13ef590e0351358a6b2b77c026a109875a 100644 (file)
@@ -711,6 +711,14 @@ class XendDomainInfo:
         if not self.info.is_hvm():
             return
 
+        # Check if there is intermediate PCIe switch bewteen the device and
+        # Root Complex.
+        if pci_device.is_behind_switch_lacking_acs():
+            err_msg = 'pci: to avoid potential security issue, %s is not'+\
+                    ' allowed to be assigned to guest since it is behind'+\
+                    ' PCIe switch that does not support or enable ACS.'
+            raise VmError(err_msg % pci_device.name)
+
         # Check the co-assignment.
         # To pci-attach a device D to domN, we should ensure each of D's
         # co-assignment devices hasn't been assigned, or has been assigned to
index 3f4e8aca56152870a5c49c7c61d183c71acd44dd..018309c3de678762ec40aac7ecc27005396531a4 100644 (file)
@@ -374,6 +374,15 @@ class PciController(DevController):
             except Exception, e:
                 raise VmError("pci: failed to locate device and "+
                         "parse its resources - "+str(e))
+
+            # Check if there is intermediate PCIe switch bewteen the device and
+            # Root Complex.
+            if self.vm.info.is_hvm() and dev.is_behind_switch_lacking_acs():
+                err_msg = 'pci: to avoid potential security issue, %s is not'+\
+                        ' allowed to be assigned to guest since it is behind'+\
+                        ' PCIe switch that does not support or enable ACS.'
+                raise VmError(err_msg % dev.name)
+
             if (dev.dev_type == DEV_TYPE_PCIe_ENDPOINT) and not dev.pcie_flr:
                 if dev.bus == 0:
                     # We cope with this case by using the Dstate transition